In [ ]:
from __future__ import division
import codecs
import pickle
import networkx as nx
from collections import Counter

rcParams['figure.figsize'] = (12.0, 10.0)
rcParams['font.family'] = 'Times New Roman'

In [ ]:
from os.path import abspath
workspace = "/".join(abspath('.').split('/')[:-1])

Note: Make sure that your workspace sees the root directory of openie_eval.


In [ ]:
from openie_eval.openie_eval import semantic_parsing as sp
from openie_eval.openie_eval import ontologization
reload(sp)
reload(ontologization)

from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

In [ ]:
keyword = 'carnatic_music'

wiki_entities = codecs.open(workspace + '/data/ground-truth/'+keyword+'_pages.txt', encoding='utf-8').readlines()
wiki_entities = [i.strip().lower() for i in wiki_entities]

methods = ['reverb', 'openie', 'semantic-parsing']
labels = {'reverb': 'ReVerb', 'openie': 'OpenIE 4.0', 'semantic-parsing': 'Sem. Parsing'}
colors = ['#990033', '#006600', '#330066']

#coref_suffix = ''
coref_suffix = '-coref'

filtered_suffix = ''
#filtered_suffix = '-filtered'

rules = pickle.load(file(workspace + '/data/results/qualitative/entity-identification/rule-based/'+keyword+'/rules.pickle'))
groundtruth = ontologization.load_groundtruth(keyword, rules.keys())

Valid relation types


In [ ]:
#class-wise
method_counts = {method: [] for method in methods}

for class_type in groundtruth.keys():
    tsv_file = workspace + '/data/results/qualitative/semantic-relation-extraction/'+class_type+'.tsv'
    data = loadtxt(tsv_file, delimiter='\t', dtype='str')
    if len(data) == 0:
        for method in methods:
            method_counts[method].append(0)
        continue
    method_counts['reverb'].append(sum(unique(data[:, 0]) != ''))
    method_counts['openie'].append(sum(unique(data[:, 1]) != ''))
    method_counts['semantic-parsing'].append(sum(unique(data[:, 2]) != ''))

In [ ]:
#all-together
tsv_file = workspace + '/data/results/qualitative/semantic-relation-extraction/'+keyword+'.tsv'
data = loadtxt(tsv_file, delimiter='\t', dtype='str')
if keyword == 'carnatic_music':
    n_subtract = 7
else:
    n_subtract = 6
    
method_counts['reverb'].append(sum(unique(data[:, 0]) != '')-n_subtract)
method_counts['openie'].append(sum(unique(data[:, 1]) != '')-n_subtract)
method_counts['semantic-parsing'].append(sum(unique(data[:, 2]) != '')-n_subtract)

In [ ]:
method_counts

In [ ]:
rcParams['figure.figsize'] = (12.0, 10.0)
fig, ax = plt.subplots()

bar_width = 0.2
index = arange(len(groundtruth.keys())+1)

i = 0
for method in methods:
    counts = method_counts[method]
    rects = bar(index, counts, width=bar_width, color=colors[i], label=labels[method])
    index = index+bar_width
    i += 1
    
fontsize=30
xlabel('Concepts', fontsize=fontsize+2)
ylabel('#Relation types', fontsize=fontsize+2)

if keyword == 'carnatic_music':
    xticks(index-1.5*bar_width, [i[9:] for i in groundtruth.keys()]+['all'])
else:
    xticks(index-1.5*bar_width, [i[11:] for i in groundtruth.keys()]+['all'])
legend(prop={'size': fontsize}, loc='upper left', 
       fancybox=True)

xticks(fontsize=fontsize, rotation=18)
yticks(fontsize=fontsize)

In [ ]:
fname = workspace + '/data/results/qualitative/semantic-relation-extraction/'+keyword
savefig(fname+'.pdf', dpi=200, facecolor='w', edgecolor='w', orientation='landscape', 
        papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
savefig(fname+'.png', dpi=200, facecolor='w', edgecolor='w', orientation='landscape', 
        papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)

In [ ]:
close('all')

Assertions with those reltypes over entities


In [ ]:
#class-wise
method_rels = {method: {} for method in methods}

for class_type in groundtruth.keys():
    tsv_file = workspace + '/data/results/qualitative/semantic-relation-extraction/'+class_type+'.tsv'
    data = loadtxt(tsv_file, delimiter='\t', dtype='str')
    if len(data) == 0:
        continue
    method_rels['reverb'][class_type] = unique(data[:, 0])
    method_rels['openie'][class_type] = unique(data[:, 1])
    method_rels['semantic-parsing'][class_type] = unique(data[:, 2])

In [ ]:
method_rels

In [ ]:
method_counts = {method: [] for method in methods}

for method, reldata in method_rels.items():
    relations = pickle.load(file(workspace+'/data/'+method+'/'+keyword+'/relations'+coref_suffix+'-filtered.pickle'))
    relations = [[i['arg1'].lower(), lemmatizer.lemmatize(i['rel'].lower(), pos='v'), i['arg2'].lower()] for i in relations]
    
    for class_type in groundtruth.keys():
        class_count = 0
        if class_type not in reldata.keys():
            method_counts[method].append(0)
            continue
        for rel in reldata[class_type]:
            class_count += len([i for i in relations if i[1] == rel])
        method_counts[method].append(class_count)

In [ ]:
rcParams['figure.figsize'] = (12.0, 10.0)
fig, ax = plt.subplots()

bar_width = 0.2
index = arange(len(groundtruth.keys()))

i = 0
for method in methods:
    counts = method_counts[method]
    rects = bar(index, counts, width=bar_width, color=colors[i], label=labels[method])
    index = index+bar_width
    i += 1
    
fontsize=30
xlabel('Concepts', fontsize=fontsize+2)
ylabel('#Assertions', fontsize=fontsize+2)

if keyword == 'carnatic_music':
    xticks(index-1.5*bar_width, [i[9:] for i in groundtruth.keys()])
else:
    xticks(index-1.5*bar_width, [i[11:] for i in groundtruth.keys()])
legend(prop={'size': fontsize}, loc='upper center', 
       bbox_to_anchor=(0.5, 1.18), fancybox=True)

xticks(fontsize=fontsize, rotation=18)
yticks(fontsize=fontsize)

In [ ]:
ylim(0, 245)

In [ ]:
fname = workspace+'/data/results/qualitative/semantic-relation-extraction/'+keyword+'-relcount'
savefig(fname+'.pdf', dpi=200, facecolor='w', edgecolor='w', orientation='landscape', 
        papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)
savefig(fname+'.png', dpi=200, facecolor='w', edgecolor='w', orientation='landscape', 
        papertype=None, format=None, transparent=False, bbox_inches='tight', pad_inches=0.1)

In [ ]:
close('all')